library(tidyverse)
library(SingleCellExperiment)
library(Statial)
library(spicyR)
library(plotly)
library(ClassifyR)
library(lisaClust)
library(ggsurvfit)
library(ggthemes)
library(DT)
library(htmlwidgets)
library(ggthemes)

# Set ggplot theme
theme_set(theme_classic())

Loading datasets

Dataset 1 - Keren et al

Dataset 2 - Ali et al

The next sections will look at Keren et al.

load("../data/kerenSCE.rda")
#load("../data/aliSCE.rda")


kerenSCE
## class: SingleCellExperiment 
## dim: 48 197678 
## metadata(0):
## assays(1): intensities
## rownames(48): Na Si ... Ta Au
## rowData names(0):
## colnames(197678): 1 2 ... 197677 197678
## colData names(42): x y ... Censored region
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Data cleaning

# Removing patients with cold tumour due to small sample size of 5
kerenSCE = kerenSCE[, kerenSCE$tumour_type != "cold"]
kerenSCE$tumour_type = droplevels(kerenSCE$tumour_type)

# Removing patients 22 and 38 because they have missing survival data.
kerenSCE = kerenSCE[, !kerenSCE$imageID %in% c("22", "38")]
kerenSCE$imageID = as.numeric(kerenSCE$imageID)


# Extracting clinical information
clinicalDf = kerenSCE |> 
  colData() |> 
  data.frame() |> 
  select(-c(x, y, CellID, cellType, cellSize, C, tumorYN, tumorCluster, Group, immuneCluster, immuneGroup, region)) |> 
  unique() |> 
  remove_rownames()

Data visualisation

Dimension reduction plot

set.seed(51773)
kerenSCE <- scater::runUMAP(kerenSCE, exprs_values = "intensities", name = "UMAP")

scater::plotReducedDim(kerenSCE, dimred = "UMAP", colour_by = "cellType")

Example image

p = kerenSCE |> 
  colData() |> 
  data.frame() |> 
  filter(imageID == "5") |> 
  ggplot(aes(x = x, y = y, col = cellType)) +
  geom_point(size = 1) +
  ggthemes::scale_colour_tableau( palette = "Tableau 20")


ggplotly(p)

Calculating cell proportions

Proportions

cellProp = spicyR::getProp(
  cells = kerenSCE
)

cellProp |> 
  round(4) |> 
  DT::datatable(options = list(scrollX = TRUE))

Testing across conditions

testProp = spicyR::colTest(kerenSCE,
                condition = "tumour_type",
                feature = "cellType")

testProp |> 
  DT::datatable(options = list(scrollX = TRUE))
cellProp |> 
  rownames_to_column("imageID") |> 
  mutate(imageID = as.numeric(imageID)) |> 
  left_join(clinicalDf) |> 
  ggplot(aes(x = tumour_type, y = `Keratin+Tumour`)) +
  geom_boxplot() +
  labs(y = "Proprotion of Keretin+Tumour cells",
       x = "Tumour Type")

Region analysis

set.seed(51773)

kerenSCE <- lisaClust(
  kerenSCE,
  k = 5,
  spatialCoords = c("x", "y"),
  cellType = "cellType"
)

Visualising regions

Hatching plot

p = hatchingPlot(
  kerenSCE,
  useImages = "5",
  cellType = "cellType",
  spatialCoords = c("x", "y"),
  line.spacing = 41, # spacing of lines
  nbp = 100 # smoothness of lines
)

p +
  ggthemes::scale_colour_tableau( palette = "Tableau 20") +
  guides(fill=guide_legend(ncol=2))

Coloured regions

kerenSCE |> 
  colData() |> 
  data.frame() |> 
  filter(imageID == "5") |> 
  ggplot(aes(x = x, y = y, col = region)) +
  geom_point() +
  ggthemes::scale_colour_tableau() +
  theme_classic()

Region composition

regionMap(kerenSCE,
          cellType = "cellType")

Spatial interactions

A positive value means the two cell types are localised, a negative value means they are dispersed.

interactions = spicyR::getPairwise(
  cells = kerenSCE,
) |> 
  data.frame()

interactions |> 
  round(4) |> 
  DT::datatable( options = list(scrollX = TRUE))
p = kerenSCE |> 
  colData() |> 
  data.frame() |> 
  filter(imageID == "3") |> 
  ggplot(aes(x = x, y = y, col = cellType)) +
  geom_point(size = 1) +
  ggthemes::scale_colour_tableau( palette = "Tableau 20") +
  theme_classic() +
  ggtitle("Image 3")


ggplotly(p)

Check which spatial interactions are associated with survival

The interaction between Keratin+Tumour cells and other immune cells are the most significant spatial relationship which contributes to patient survival.

The CoxPH coefficient is negative, which means when Keratin+Tumour and other immune cells are localising, the patient tends to live longer.

# Create survival object from data 
survivalOutcomes = Surv(clinicalDf$`Survival_days_capped.`, clinicalDf$Censored)

# Fit CoxPh models on all cell relationships.
ClassifyR::colCoxTests(interactions, survivalOutcomes) |> 
  arrange(p.value) |> 
  round(4) |> 
  DT::datatable( options = list(scrollX = TRUE))
# Selecting the most significant relationship
relationship = interactions$Keratin.Tumour__other.immune

# Splitting the values by median relationship
relationship = ifelse(relationship > median(relationship), "Attraction", "Avoidance")
    
# Plotting Kaplan-Meier curve
survfit2(survivalOutcomes ~ relationship) |>
    ggsurvfit() +
    add_pvalue() +
    ggtitle("Keratin+Tumour__Other immune")

Classification

# Calculate proportions of regions in each image
regionProp <- spicyR::getProp(kerenSCE, 
                       feature = "region",
                       imageID = "imageID")

# Get the average expression of a marker in each cell type in each region
cellTypeRegionMeans <- Statial::getMarkerMeans(kerenSCE,
                              imageID = "imageID",
                              cellType = "cellType",
                              region = "region")

# Group all features in one vector
featureList = list(
  "Cell type proportions" = cellProp,
  "Region proportions" = regionProp,
  "Marker means" = cellTypeRegionMeans,
  "Spatial interactions" = interactions
)



# Training a CoxNet survival model 
CV = ClassifyR::crossValidate(
  measurements = featureList,
  outcome = survivalOutcomes,
  classifier = "CoxNet",
  selectionMethod = "CoxPH",
  nFolds = 5,
  nFeatures = 10,
  nRepeats = 20
)
ClassifyR::performancePlot(CV, metric = "C-index",
                characteristicsList = list(x = "auto", fillColour = "Assay Name")) +
  theme(legend.position = "none") +
  scale_x_discrete(guide = guide_axis(angle = 45)) +
  theme(axis.text.x = element_text(size = 7)) +
  scale_fill_tableau()